/*
 * Copyright (c) 2021-2025 Symas Corporation
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met:
 *
 * * Redistributions of source code must retain the above copyright
 *   notice, this list of conditions and the following disclaimer.
 * * Redistributions in binary form must reproduce the above
 *   copyright notice, this list of conditions and the following disclaimer
 *   in the documentation and/or other materials provided with the
 *   distribution.
 * * Neither the name of the Symas Corporation nor the names of its
 *   contributors may be used to endorse or promote products derived from
 *   this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <algorithm>
#include <cassert>
#include <cctype>
#include <cstdlib>
#include <cstring>

#include <sys/mman.h>

#ifndef _LEXIO_H_
#define _LEXIO_H_

#define SPACE ' '

bool lexer_echo();
bool is_reference_format();

static inline bool isquote( char ch ) {
  return ch == '\'' || ch == '"';
}

static inline void
erase_source( char *src, char *esrc ) {
  std::replace_if(src, esrc,
                  [](char ch) { return ch != '\n'; },
                  SPACE );
}

/*
   * Column number as in Cobol, with 1 at the start of the line.
   *  0: free-format, but comment lines may start with '*'.
   *  N: columns less than N treated as space.
   *     '/' or '*' in N starts a comment
   *     'D' starts a debug line
   *     '-' is a line-continuation indicator
   *     Others ignored.
   *  Right margin is enforced if it is greater than left margin.
   */
struct bytespan_t {
  char *data, *eodata;

  bytespan_t() : data( nullptr), eodata(nullptr) {}
  
  bytespan_t( char *data, char *eodata )
    : data(data), eodata(eodata)
  {
    if( eodata < data ) {
      this->eodata  = data + strlen(data);
    }
    assert( this->data <= this->eodata );
  }
  size_t size() const { return eodata - data; }

  bool in_string( ) const {
    char open = '\0';

    for( char *q = data; (q = std::find_if(q, eodata, isquote)) != eodata; q++) {
      if( !open ) {
        open = *q; // first quote opens
        continue;
      }
      if( open == *q && q + 1 < eodata && q[0] == q[1] ) { // doubled
        q++;
        continue;
      }
      if( open == *q ) open = '\0'; // closing quote must match
    }
    return isquote(open);
  }

  char * append( const char *input, const char *eoinput );

  bytespan_t&
  update( char *line, char *eoline, size_t right_margin ) {
    *this = bytespan_t(line, eoline);
    if( right_margin && data + right_margin < eodata ) {
      erase_source(data + right_margin, eodata);
      eodata = data + right_margin;
    }
    eodata = std::find(data, eodata, '\n');
    return *this;
  }
};

// YYLTYPE supplied by cbldiag.h.  Borrowed from parse.h as generated by Bison.

struct filespan_t : public bytespan_t {
  char *cur, *eol, *quote;
 private:
  size_t iline, icol;
  size_t line_quote72;
  static char empty_file[8];
 public:
  filespan_t()
    : cur(data), eol(data), quote(NULL), iline(0), icol(0), line_quote72(0)
  {}
  filespan_t(void *p, size_t len)
    : bytespan_t( static_cast<char*>(p), static_cast<char*>(p) + len )
    , cur(data), eol(data), quote(NULL), iline(0), icol(0), line_quote72(0)
  {}

  size_t lineno() const { return iline; }
  size_t colno() const  { return icol; }

  void lineno_reset()  { iline = 0; }
  size_t colno( size_t icol )  { return this->icol = icol; }

  bool nada() const { return data == empty_file; }
  void use_nada() {
    assert(!data);
    cur = eol = data = empty_file;
    eol = eodata = empty_file + sizeof(empty_file) - 1;
  }

  const char *ccur() const { return cur; }

  /*
   * "If an alphanumeric or national literal that is to be continued on
   * the next line has as its last character a quotation mark in
   * column 72, the continuation line must start with two consecutive
   * quotation marks."
   */
  bool was_quote72() const { return iline == line_quote72 + 1; }

  size_t next_line() {
    // Before advancing, mark the current line as ending in a quote, if true.
    if( is_reference_format() && 72 <= line_length() ) {
      if( isquote(cur[71]) ) { line_quote72 = iline; }
    }

    cur = eol;
    assert(data <= cur && cur <= eodata);
    if( cur == eodata ) return 0;

    eol = std::find(cur, eodata, '\n');

    if( eol < eodata ) {
      ++eol;
      ++iline;
      icol = 0;
    }
    return eol - cur;
  }

  size_t line_length() const { return eol - cur; }

  static size_t tab_check( const char *src, const char *esrc );

  bool is_blank_line() const {
    auto p = std::find_if( cur, eol, []( char ch ) { return !fisspace(ch); } );
    return p == eol;
  }

  YYLTYPE as_location() const {
    YYLTYPE loc;

    loc.first_line = loc.last_line = 1 + iline;
    loc.first_column = loc.last_column = 1 + icol;
    return loc;
  }

};

#if USE_STD_REGEX
# include <regex>
#else
# include "dts.h"
using dts::csub_match;
using dts::cmatch;
using dts::regex;
using dts::regex_search;
#endif

struct span_t {
 protected:
  void verify() const {
    if( !p ) {
      dbgmsg("span_t::span_t: p is NULL");
    } else if( ! (p <= pend) ) {
      dbgmsg("span_t::span_t: p %p > pend %p", p, pend);
    }
    assert(p && p <= pend);
  }
  span_t& trim() {
    while( p < pend     && isblank(p[0]) ) p++;
    while( p < pend - 1 && isblank(pend[-1]) ) pend--;
    return *this;
  }
 public:
  const char *p, *pend;
  span_t() : p(NULL), pend(NULL) {}

  span_t( size_t len, const char *data ) : p(data), pend(data + len) {
    verify();
  }
  span_t( const char *data, const char *eodata ) : p(data), pend(eodata) {
    verify();
  }
  // cppcheck-suppress operatorEqRetRefThis
  span_t& operator=( const csub_match& cm ) {
    p = cm.first;
    pend = cm.second;
    return p && pend ? trim() : *this;
  }

  int size() const { return pend - p; }

  size_t nlines() const { return p && pend? std::count(p, pend, '\n') : 0; }

  span_t dup() const {
    auto  output = new char[size() + 1];
    auto eout = std::copy(p, pend, output);
    *eout = '\0';
    return span_t(output, eout);
  }
  const char * has_nul() const {
    auto p_l = std::find(this->p, pend, '\0');
    return p_l != pend? p_l : NULL;
  }

  bool at_eol() const {
    return p < pend && '\n' == pend[-1];
  }
  const char * optional_eol() const {
    return at_eol() ? "" : "\n";
  }
};

struct replace_t {
  struct span_t before, after;
  replace_t() : before(span_t()), after(span_t()) {}
  replace_t( span_t before, span_t after )
    : before(before), after(after)
  {}
  replace_t& reset() {
    before = after = span_t();
    return *this;
  }
};

#include <cstdio>
#include <list>

class cdftext {
  static filespan_t  free_form_reference_format( int fd );
  static void process_file( filespan_t, int output, bool second_pass = false );

  static filespan_t map_file( int fd );

  static void echo_input( int input, const char filename[] );

  static int open_input( const char filename[] );
  static int open_output();

  static std::list<span_t> segment_line( filespan_t& mfile );

 public:
  static FILE * lex_open( const char filename[] );
};

std::list<replace_t> free_form_reference_format( filespan_t mfile );

#endif
